#----------------------------------------------
# Measuring Misperceptions: Limits of Party-Specific Stereotype Reports
# Orr and Huber, 2021, POQ
# Inputs: OrrHuber_POQ_Study2_clean.csv and publicly available national survey files
# Outputs: Replication of Study 2
# Figures 5, A1, A2, A3, A4, A5, A7, A8
# Tables A2, A3, A4, A5, A6, A7, A8
#----------------------------------------------


#----------------------------------------------
# Load Packages
#----------------------------------------------

library(readstata13)
library(dplyr)
library(ggplot2)
library(estimatr)
library(reshape2)
library(matrixStats)
library(xtable)


#----------------------------------------------
# Load Data
#----------------------------------------------

# Clean study 2 data is posted alongside this script

dat <- read.csv("OrrHuber_POQ_Study2_clean.csv", as.is = TRUE)


# National survey data is available online, and is not re-archived alongside this 
# script - see README for access instructions

cc <- read.dta13("CCES18_Common_OUTPUT_vv_topost.dta", nonint.factors = TRUE)
anes <- read.dta13("anes_timeseries_2016_Stata13.dta", nonint.factors = TRUE)
gss <- read.dta13("gss.dta", nonint.factors = TRUE)


#----------------------------------------------
# Estimate party + public traits from CCES data
#----------------------------------------------

cc$pid2 <- recode(cc$pid3, .default = NA_character_,
                  "Democrat" = "D", "Republican" = "R")

cc$faminc_new[cc$faminc_new == "Prefer not to say"] <- NA

party_cc <- group_by(cc[!is.na(cc$pid2),], pid2) %>% 
  summarize(city = weighted.mean(urbancity == "City", w = commonweight, na.rm = TRUE)*100,
            female = weighted.mean(gender == "Female", w = commonweight, na.rm = TRUE)*100,
            under40 = weighted.mean(birthyr >= 1978, w = commonweight, na.rm = TRUE)*100,
            lgb = weighted.mean(sexuality %in% c("Lesbian / gay woman", "Gay man", "Bisexual"), w = commonweight, na.rm = TRUE)*100,
            college = weighted.mean(educ %in% c("4-year", "Post-grad"), w = commonweight, na.rm = TRUE)*100,
            nonwhite = weighted.mean(race != "White", w = commonweight, na.rm = TRUE)*100,
            nonreligious = weighted.mean(!pew_religimp %in% c("Very important", "Somewhat important"), w = commonweight, na.rm = TRUE)*100,
            under_income = weighted.mean(as.numeric(faminc_new) < 13, w = commonweight, na.rm = TRUE)*100,
            union = weighted.mean(union == "Yes, I am currently a member of a labor union", w = commonweight, na.rm = TRUE)*100)

us_cc <-
  summarize(cc,
            city = weighted.mean(urbancity == "City", w = commonweight, na.rm = TRUE)*100,
            female = weighted.mean(gender == "Female", w = commonweight, na.rm = TRUE)*100,
            under40 = weighted.mean(birthyr >= 1978, w = commonweight, na.rm = TRUE)*100,
            lgb = weighted.mean(sexuality %in% c("Lesbian / gay woman", "Gay man", "Bisexual"), w = commonweight, na.rm = TRUE)*100,
            college = weighted.mean(educ %in% c("4-year", "Post-grad"), w = commonweight, na.rm = TRUE)*100,
            nonwhite = weighted.mean(race != "White", w = commonweight, na.rm = TRUE)*100,
            nonreligious = weighted.mean(!pew_religimp %in% c("Very important", "Somewhat important"), w = commonweight, na.rm = TRUE)*100,
            under_income = weighted.mean(as.numeric(faminc_new) < 13, w = commonweight, na.rm = TRUE)*100,
            union = weighted.mean(union == "Yes, I am currently a member of a labor union", w = commonweight, na.rm = TRUE)*100)


cces_rates <- data.frame(Democrats = unlist(party_cc[1, -1]),
                         Republicans = unlist(party_cc[2, -1]),
                         USPublic = unlist(us_cc),
                         Label = c("City", "Female", "Under 40", "LGB",  "College grad", "Non-White", 
                                   "Non-religious", "Under $200,000", "Union member"))


#----------------------------------------------
# Estimate party + public traits from ANES data
#----------------------------------------------

anes$pid2 <- recode(anes$V161158x, .default = NA_character_,
                  "1. Strong Democrat" = "D", "2. Not very strong Democract" = "D", "3. Independent-Democrat" = "D",
                  "7. Strong Republican" = "R", "6. Not very strong Republican" = "R", "5. Independent-Republican" = "R")

anes$urban <- anes$V167534
anes$urban[anes$urban %in% c("-8. Don't know", "-1. Inap")] <- NA
anes$gender <- anes$V161342
anes$gender[anes$gender =="-9. Refused"] <- NA
anes$age <- anes$V161267
anes$age[anes$age < 0] <- NA
anes$sexuality <- anes$V161511
anes$sexuality[anes$sexuality %in% c("-9. Refused", "-5. Interview breakoff (sufficient partial IW)")] <- NA
anes$degree <- anes$V161270
anes$degree[anes$degree %in% c("-9. Refused", "-8. Don't know (FTF only)")] <- NA
anes$white_nonhisp <- anes$V161310a == "1. Selected"
anes$white_nonhisp[anes$V161309 == "1. Yes"] <- FALSE
anes$white_nonhisp[anes$V161310a %in% c("-9. Refused question", "-8. Don't know race (FTF only)")] <- NA
anes$religious <- anes$V161241
anes$religious[anes$religious %in% c("-9. Refused question", "-8. Don't know (FTF only)")] <- NA
anes$income <- anes$V161360
anes$income[anes$income %in% c("-9. Refused", "-5. Interview breakoff (sufficient partial IW)")] <- NA
anes$union <- anes$V161303
anes$union[anes$union %in% c("-9. Refused", "-8. Don't know (FTF only)")] <- NA


party_anes <- group_by(anes[!is.na(anes$pid2),], pid2) %>% 
  summarize(city = weighted.mean(urban == "4. Urban", w = V160102, na.rm = TRUE)*100,
            female = weighted.mean(gender == "2. Female", w = V160102, na.rm = TRUE)*100,
            under40 = weighted.mean(age < 40, w = V160102, na.rm = TRUE)*100,
            lgb = weighted.mean(sexuality %in% c("2. Homosexual or gay (or lesbian)", "3. Bisexual"), w = V160102, na.rm = TRUE)*100,
            college = weighted.mean(degree %in% c("13. Bachelor's degree (for example: BA, AB, BS)", "14. Master's degree (for example: MA, MS, MENG, MED, MSW, MBA)",
                                                "15. Professional school degree (for example: MD, DDS, DVM, LLB, JD)",
                                                "16. Doctorate degree (for example: PHD, EDD)"), w = V160102, na.rm = TRUE)*100,
            nonwhite = weighted.mean(!white_nonhisp, w = V160102, na.rm = TRUE)*100,
            nonreligious = weighted.mean(!religious %in% c("1. Important"), w = V160102, na.rm = TRUE)*100,
            under_income = weighted.mean(!income %in% c("5. $175,000-249,999 ", "6. $250,000 or more"), w = V160102, na.rm = TRUE)*100, # Note ind income (cces is fam)
            union = weighted.mean(union %in% c("1. Respondent only", "2. Respondent and spouse/partner", 
                                               "3. Respondent and someone else"), w = V160102, na.rm = TRUE)*100)

us_anes <-
  summarize(anes,
            city = weighted.mean(urban == "4. Urban", w = V160102, na.rm = TRUE)*100,
            female = weighted.mean(gender == "2. Female", w = V160102, na.rm = TRUE)*100,
            under40 = weighted.mean(age < 40, w = V160102, na.rm = TRUE)*100,
            lgb = weighted.mean(sexuality %in% c("2. Homosexual or gay (or lesbian)", "3. Bisexual"), w = V160102, na.rm = TRUE)*100,
            college = weighted.mean(degree %in% c("13. Bachelor's degree (for example: BA, AB, BS)", "14. Master's degree (for example: MA, MS, MENG, MED, MSW, MBA)",
                                                "15. Professional school degree (for example: MD, DDS, DVM, LLB, JD)",
                                                "16. Doctorate degree (for example: PHD, EDD)"), w = V160102, na.rm = TRUE)*100,
            nonwhite = weighted.mean(!white_nonhisp, w = V160102, na.rm = TRUE)*100,
            nonreligious = weighted.mean(!religious %in% c("1. Important"), w = V160102, na.rm = TRUE)*100,
            under_income = weighted.mean(!income %in% c("5. $175,000-249,999 ", "6. $250,000 or more"), w = V160102, na.rm = TRUE)*100, # Note ind income (cces is fam)
            union = weighted.mean(union %in% c("1. Respondent only", "2. Respondent and spouse/partner", 
                                               "3. Respondent and someone else"), w = V160102, na.rm = TRUE)*100)


anes_rates <- data.frame(Democrats = unlist(party_anes[1, -1]),
                         Republicans = unlist(party_anes[2, -1]),
                         USPublic = unlist(us_anes),
                         Label = c("City", "Female", "Under 40", "LGB",  "College grad", "Non-White", 
                                   "Non-religious", "Under $200,000", "Union member"))


#----------------------------------------------
# Estimate party + public traits from GSS data
#----------------------------------------------

gss$pid2 <- recode(gss$partyid, .default = NA_character_,
                  "Ind,near dem" = "D", "Not str democrat" = "D", "Strong democrat" = "D", 
                  "Ind,near rep" = "R", "Not str republican" = "R", "Strong republican" = "R")
gss$age[gss$age == "89 or older"] <- "89"
gss$age <- as.numeric(gss$age)
gss$sexornt[gss$sexornt %in% c("No answer", "Not applicable", "Dont know")] <- NA
gss$degree[gss$degree == "No answer"] <- NA
gss$relpersn[gss$relpersn  %in% c("No answer", "Dont know")] <- NA
gss$union[gss$union %in% c("No answer", "Not applicable", "Don't know")] <- NA
gss$white_nonhis <- gss$race == "White"
gss$white_nonhis[gss$hispanic != 1] <- FALSE


party_gss <- group_by(gss[!is.na(gss$pid2) & gss$year == 2018,], pid2) %>% 
  summarize(city = NA,
            female = weighted.mean(sex == "Female", w = wtssall, na.rm = TRUE)*100,
            under40 = weighted.mean(age < 40, w = wtssall, na.rm = TRUE)*100,
            lgb = weighted.mean(sexornt %in% c("Gay, lesbian, or homosexual", "Bisexual"), w = wtssall, na.rm = TRUE)*100,
            college = weighted.mean(degree %in% c("Bachelor", "Graduate"), w = wtssall, na.rm = TRUE)*100,
            nonwhite = weighted.mean(!white_nonhis, w = wtssall, na.rm = TRUE)*100,
            nonreligious = weighted.mean(!relpersn %in% c("Very religious", "Modrte religious"), w = wtssall, na.rm = TRUE)*100, # Religiositry not importance
            under_income = NA,
            union = weighted.mean(union_ %in% c("R and spouse belong", "R belongs"), w = wtssall, na.rm = TRUE)*100)

us_gss <-
  summarize(gss[gss$year == 2018,],
            city = NA,
            female = weighted.mean(sex == "Female", w = wtssall, na.rm = TRUE)*100,
            under40 = weighted.mean(age < 40, w = wtssall, na.rm = TRUE)*100,
            lgb = weighted.mean(sexornt %in% c("Gay, lesbian, or homosexual", "Bisexual"), w = wtssall, na.rm = TRUE)*100,
            college = weighted.mean(degree %in% c("Bachelor", "Graduate"), w = wtssall, na.rm = TRUE)*100,
            nonwhite = weighted.mean(!white_nonhis, w = wtssall, na.rm = TRUE)*100,
            nonreligious = weighted.mean(!relpersn %in% c("Very religious", "Modrte religious"), w = wtssall, na.rm = TRUE)*100, # Religiositry not importance
            under_income = NA,
            union = weighted.mean(union_ %in% c("R and spouse belong", "R belongs"), w = wtssall, na.rm = TRUE)*100)

gss_rates <- data.frame(Democrats = unlist(party_gss[1, -1]),
                         Republicans = unlist(party_gss[2, -1]),
                         USPublic = unlist(us_gss),
                         Label = c("City", "Female", "Under 40", "LGB",  "College grad", "Non-White", 
                                   "Non-religious", "Under $200,000", "Union member"))


#----------------------------------------------
# Figure 5: Effect of vignette pid on perceived traits
#----------------------------------------------

dat$treat_pid <- factor(dat$treat_pid, levels = c("None", "D", "R"))

gaps_plotdat <- cbind(
  data.frame(rbind(
    summary(lm_robust(how_city ~ treat_pid, data = dat, weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_city ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw))$coefficients[3, 1:2],
    summary(lm_robust(how_female ~ treat_pid, data = dat, weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_female ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw))$coefficients[3, 1:2],
    summary(lm_robust(how_young ~ treat_pid, data = dat, weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_young ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw))$coefficients[3, 1:2],
    summary(lm_robust(how_lgb ~ treat_pid, data = dat, weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_lgb ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw))$coefficients[3, 1:2],
    summary(lm_robust(how_college ~ treat_pid, data = dat, weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_college ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw))$coefficients[3, 1:2],
    summary(lm_robust(how_nonwhite ~ treat_pid, data = dat, weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_nonwhite ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw))$coefficients[3, 1:2],
    summary(lm_robust(how_nonrel ~ treat_pid, data = dat, weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_nonrel ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw))$coefficients[3, 1:2],
    summary(lm_robust(how_under200 ~ treat_pid, data = dat, weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_under200 ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw))$coefficients[3, 1:2],
    summary(lm_robust(how_union ~ treat_pid, data = dat, weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_union ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw))$coefficients[3, 1:2],
    cbind(cces_rates$Democrats - cces_rates$USPublic, NA),
    cbind(cces_rates$Republicans - cces_rates$USPublic, NA),
    cbind(cces_rates$Democrats - cces_rates$Republicans, NA)
  )),
  Comparison = c(rep(c("D - US", "R - US", "D - R"), 9), rep("D - US", 9), rep("R - US", 9), rep("D - R", 9)),
  Area = factor(c(rep(as.character(cces_rates$Label), each = 3), rep(as.character(cces_rates$Label), 3)), 
                levels = cces_rates$Label[order(cces_rates$Democrats - cces_rates$Republicans)]),
  Source = c(rep("Average Estimate", 27), rep("Approximate Truth", 27))
)

gaps_plotdat$ComparisonUS <- "Party vs Party"
gaps_plotdat$ComparisonUS[grepl("US", gaps_plotdat$Comparison)] <- "Party vs US Public"

ggplot(gaps_plotdat, aes(y = Estimate, x = Area, color = Comparison, shape = Source)) + 
  geom_point(position = position_dodge(0.3)) +
  geom_errorbar(aes(ymin = Estimate - 1.96*Std..Error, 
                    ymax = Estimate + 1.96*Std..Error),
                    #linetype = Comparison), 
                width = 0, position = position_dodge(0.3)) +
  scale_color_manual(values = c("black", "darkblue", "firebrick1")) +
  geom_hline(yintercept = 0, color = "darkgrey", linetype = "dashed") +
  facet_wrap(vars(ComparisonUS), scales = "free_x") + 
  labs(x = "", y = "Difference") +
  coord_flip() +
  theme_bw() +
  theme(legend.position = "bottom", legend.box = "vertical", legend.margin = margin(),
        strip.background = element_rect(fill = "white"))
ggsave(file = "Figure5.tiff", height = 6, width = 8.5)


#----------------------------------------------
# Table A2: Study 2 Survey Sample
#----------------------------------------------

round(prop.table(table(dat$gender, useNA = "always")), 2)

median(dat$age, na.rm = TRUE)

round(prop.table(table(dat$race, useNA = "always")), 2)

median(dat$hhincome, na.rm = TRUE)

round(prop.table(table(dat$ideo, useNA = "always")), 2)

round(prop.table(table(dat$pid3, useNA = "always")), 2)


#----------------------------------------------
# Tables A3 - A6: Average Rates and Estimates
#----------------------------------------------

# Base rates

round(cces_rates[,-4], 2)
round(anes_rates[,-4], 2)
round(gss_rates[,-4], 2)

# Average estimates

lucid_howrates <- group_by(dat, treat_pid) %>% 
  summarize(city = weighted.mean(how_city, w = treat_ipw, na.rm = T),
            female = weighted.mean(how_female, w = treat_ipw, na.rm = T),
            under40 = weighted.mean(how_young, w = treat_ipw, na.rm = T), 
            lbg = weighted.mean(how_lgb, w = treat_ipw, na.rm = T),
            college = weighted.mean(how_college, w = treat_ipw, na.rm = T),
            nonwhite = weighted.mean(how_nonwhite, w = treat_ipw, na.rm = T), 
            nonreligious = weighted.mean(how_nonrel, w = treat_ipw, na.rm = T),
            high_income = weighted.mean(how_under200, w = treat_ipw, na.rm = T),
            union = weighted.mean(how_union, w = treat_ipw, na.rm = T)) %>% 
  t() %>% 
  apply(2, function(x) round(as.numeric(x), 2))
lucid_howrates <- data.frame(lucid_howrates[-1, c(2, 3, 1)])
colnames(lucid_howrates) <- c("Democrats", "Republicans", "USPublic")
row.names(lucid_howrates) <- row.names(cces_rates)
lucid_howrates$Label <- cces_rates$Label

lucid_howrates


#----------------------------------------------
# Figure A1: Average estimates by respondent pid
#----------------------------------------------

howrates_bypid_plot <- group_by(dat, treat_pid, pid3) %>% 
  summarize(city = mean(how_city, na.rm = T),
            female = mean(how_female, na.rm = T),
            under40 = mean(how_young, na.rm = T),
            lbg = mean(how_lgb, na.rm = T),
            college = mean(how_college, na.rm = T),
            nonwhite = mean(how_nonwhite, na.rm = T),
            nonreligious = mean(how_nonrel, na.rm = T),
            under_income = mean(how_under200, na.rm = T),
            union = mean(how_union, na.rm = T)) %>% 
  melt(id.vars = c("treat_pid", "pid3"), value.name = "Average",
       measure.vars = c("city", "female", "under40", "lbg", "college", "nonwhite",
                        "nonreligious", "under_income", "union")) %>% 
  rbind(data.frame(treat_pid = rep(c("D", "R", "None"), each = 9),
                   pid3 = "Approximate Truth",
                   variable = rep(rownames(cces_rates), 3),
                   Average = c(cces_rates$Democrats, cces_rates$Republicans, cces_rates$USPublic)))
howrates_bypid_plot$Label <- dplyr::recode(howrates_bypid_plot$variable, "city" = "City",
                                            "female" = "Female", "under40" = "Under 40", 
                                            "lbg" = "LGB", "lgb" = "LGB", "college" = "College grad", 
                                            "nonwhite" = "Non-White", 
                                            "nonreligious" = "Non-religious",
                                            "under_income" = "Under $200,000", "union" = "Union member")
howrates_bypid_plot$Label <- factor(howrates_bypid_plot$Label, 
                                    levels = c("Non-White", "Non-religious", "City", "Under 40", "LGB", "Female", 
                                               "College grad", "Union member", "Under $200,000")) 

howrates_bypid_plot[howrates_bypid_plot$pid3 != "I",] %>% 
  mutate(pid3 = recode(pid3, "R" = "Republican\nRespondents", "D" = "Democratic\nRespondents",
                       "Approximate Truth" = "Approximate\nTruth")) %>% 
ggplot(aes(x = Average, y = pid3, color = treat_pid, shape = treat_pid)) + 
  geom_point(size = 2) +
  facet_wrap(vars(Label)) +
  scale_color_manual(values = c("darkgrey", "darkblue", "firebrick1"),
                     name = "Vignette PID",
                     labels = c("Public", "D", "R")) +
  scale_shape_manual(values = c(15:17),
                     name = "Vignette PID",
                     labels = c("Public", "D", "R")) +
  labs(x = "Average Estimate", y = "") +
  theme_bw() +
  theme(strip.background = element_rect(fill = "white"),legend.position = "bottom")
ggsave(file = "FigureA1.pdf", height = 7, width = 8)


#----------------------------------------------
# Figure A2: Effect of vignette pid on perceived traits among strong partisans
#----------------------------------------------

sum(dat$pid_strength == "Strong")

gaps_plotdat_strongpid <- cbind(
  data.frame(rbind(
    summary(lm_robust(how_city ~ treat_pid, data = dat[dat$pid_strength == "Strong",], weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_city ~ relevel(treat_pid, ref = "R"), data = dat[dat$pid_strength == "Strong",], weights = treat_ipw))$coefficients[3, 1:2],
    summary(lm_robust(how_female ~ treat_pid, data = dat[dat$pid_strength == "Strong",], weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_female ~ relevel(treat_pid, ref = "R"), data = dat[dat$pid_strength == "Strong",], weights = treat_ipw))$coefficients[3, 1:2],
    summary(lm_robust(how_young ~ treat_pid, data = dat[dat$pid_strength == "Strong",], weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_young ~ relevel(treat_pid, ref = "R"), data = dat[dat$pid_strength == "Strong",], weights = treat_ipw))$coefficients[3, 1:2],
    summary(lm_robust(how_lgb ~ treat_pid, data = dat[dat$pid_strength == "Strong",], weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_lgb ~ relevel(treat_pid, ref = "R"), data = dat[dat$pid_strength == "Strong",], weights = treat_ipw))$coefficients[3, 1:2],
    summary(lm_robust(how_college ~ treat_pid, data = dat[dat$pid_strength == "Strong",], weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_college ~ relevel(treat_pid, ref = "R"), data = dat[dat$pid_strength == "Strong",], weights = treat_ipw))$coefficients[3, 1:2],
    summary(lm_robust(how_nonwhite ~ treat_pid, data = dat[dat$pid_strength == "Strong",], weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_nonwhite ~ relevel(treat_pid, ref = "R"), data = dat[dat$pid_strength == "Strong",], weights = treat_ipw))$coefficients[3, 1:2],
    summary(lm_robust(how_nonrel ~ treat_pid, data = dat[dat$pid_strength == "Strong",], weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_nonrel ~ relevel(treat_pid, ref = "R"), data = dat[dat$pid_strength == "Strong",], weights = treat_ipw))$coefficients[3, 1:2],
    summary(lm_robust(how_under200 ~ treat_pid, data = dat[dat$pid_strength == "Strong",], weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_under200 ~ relevel(treat_pid, ref = "R"), data = dat[dat$pid_strength == "Strong",], weights = treat_ipw))$coefficients[3, 1:2],
    summary(lm_robust(how_union ~ treat_pid, data = dat[dat$pid_strength == "Strong",], weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_union ~ relevel(treat_pid, ref = "R"), data = dat[dat$pid_strength == "Strong",], weights = treat_ipw))$coefficients[3, 1:2],
    cbind(cces_rates$Democrats - cces_rates$USPublic, NA),
    cbind(cces_rates$Republicans - cces_rates$USPublic, NA),
    cbind(cces_rates$Democrats - cces_rates$Republicans, NA)
  )),
  Comparison = c(rep(c("D - US", "R - US", "D - R"), 9), rep("D - US", 9), rep("R - US", 9), rep("D - R", 9)),
  Area = factor(c(rep(as.character(cces_rates$Label), each = 3), rep(as.character(cces_rates$Label), 3)), 
                levels = cces_rates$Label[order(cces_rates$Democrats - cces_rates$Republicans)]),
  Source = c(rep("Average Estimate", 27), rep("Approximate Truth", 27))
)

gaps_plotdat_strongpid$ComparisonUS <- "Party vs Party"
gaps_plotdat_strongpid$ComparisonUS[grepl("US", gaps_plotdat_strongpid$Comparison)] <- "Party vs US Public"


ggplot(gaps_plotdat_strongpid, aes(y = Estimate, x = Area, color = Comparison, shape = Source)) + 
  geom_point(position = position_dodge(0.3)) +
  geom_errorbar(aes(ymin = Estimate - 1.96*Std..Error, 
                    ymax = Estimate + 1.96*Std..Error), 
                width = 0, position = position_dodge(0.3)) +
  scale_color_manual(values = c("black", "darkblue", "firebrick1")) +
  geom_hline(yintercept = 0, color = "darkgrey", linetype = "dashed") +
  facet_wrap(vars(ComparisonUS), scales = "free_x") + 
  labs(x="") +
  coord_flip() +
  theme_bw() +
  theme(legend.position = "bottom", legend.box = "vertical", legend.margin = margin(),
        strip.background = element_rect(fill = "white"))
ggsave(file = "FigureA2.pdf", height = 6, width = 8.5)


#----------------------------------------------
# Figure A3: Effect of vignette pid on perceived traits among out-partisans
#----------------------------------------------

sum(dat$treat3 %in% c("partisan_outpid", "public") & dat$pid3 %in% c("D", "R"))

gaps_plotdat_outpid <- cbind(
  data.frame(rbind(
    summary(lm_robust(how_city ~ treat_pid, data = dat[dat$treat3 %in% c("partisan_outpid", "public") & dat$pid3 == "R",]))$coefficients[2, 1:2],
    summary(lm_robust(how_city ~ treat_pid, data = dat[dat$treat3 %in% c("partisan_outpid", "public") & dat$pid3 == "D",]))$coefficients[2, 1:2],
    summary(lm_robust(how_city ~ relevel(treat_pid, ref = "R"), data = dat[dat$treat3 %in% c("partisan_outpid") & dat$pid3 %in% c("D", "R"),]))$coefficients[2, 1:2],
    summary(lm_robust(how_female ~ treat_pid, data = dat[dat$treat3 %in% c("partisan_outpid", "public") & dat$pid3 == "R",]))$coefficients[2, 1:2],
    summary(lm_robust(how_female ~ treat_pid, data = dat[dat$treat3 %in% c("partisan_outpid", "public") & dat$pid3 == "D",]))$coefficients[2, 1:2],
    summary(lm_robust(how_female ~ relevel(treat_pid, ref = "R"), data = dat[dat$treat3 %in% c("partisan_outpid") & dat$pid3 %in% c("D", "R"),]))$coefficients[2, 1:2],
    summary(lm_robust(how_young ~ treat_pid, data = dat[dat$treat3 %in% c("partisan_outpid", "public") & dat$pid3 == "R",]))$coefficients[2, 1:2],
    summary(lm_robust(how_young ~ treat_pid, data = dat[dat$treat3 %in% c("partisan_outpid", "public") & dat$pid3 == "D",]))$coefficients[2, 1:2],
    summary(lm_robust(how_young ~ relevel(treat_pid, ref = "R"), data = dat[dat$treat3 %in% c("partisan_outpid") & dat$pid3 %in% c("D", "R"),]))$coefficients[2, 1:2],
    summary(lm_robust(how_lgb ~ treat_pid, data = dat[dat$treat3 %in% c("partisan_outpid", "public") & dat$pid3 == "R",]))$coefficients[2, 1:2],
    summary(lm_robust(how_lgb ~ treat_pid, data = dat[dat$treat3 %in% c("partisan_outpid", "public") & dat$pid3 == "D",]))$coefficients[2, 1:2],
    summary(lm_robust(how_lgb ~ relevel(treat_pid, ref = "R"), data = dat[dat$treat3 %in% c("partisan_outpid") & dat$pid3 %in% c("D", "R"),]))$coefficients[2, 1:2],
    summary(lm_robust(how_college ~ treat_pid, data = dat[dat$treat3 %in% c("partisan_outpid", "public") & dat$pid3 == "R",]))$coefficients[2, 1:2],
    summary(lm_robust(how_college ~ treat_pid, data = dat[dat$treat3 %in% c("partisan_outpid", "public") & dat$pid3 == "D",]))$coefficients[2, 1:2],
    summary(lm_robust(how_college ~ relevel(treat_pid, ref = "R"), data = dat[dat$treat3 %in% c("partisan_outpid") & dat$pid3 %in% c("D", "R"),]))$coefficients[2, 1:2],
    summary(lm_robust(how_nonwhite ~ treat_pid, data = dat[dat$treat3 %in% c("partisan_outpid", "public") & dat$pid3 == "R",]))$coefficients[2, 1:2],
    summary(lm_robust(how_nonwhite ~ treat_pid, data = dat[dat$treat3 %in% c("partisan_outpid", "public") & dat$pid3 == "D",]))$coefficients[2, 1:2],
    summary(lm_robust(how_nonwhite ~ relevel(treat_pid, ref = "R"), data = dat[dat$treat3 %in% c("partisan_outpid") & dat$pid3 %in% c("D", "R"),]))$coefficients[2, 1:2],
    summary(lm_robust(how_nonrel ~ treat_pid, data = dat[dat$treat3 %in% c("partisan_outpid", "public") & dat$pid3 == "R",]))$coefficients[2, 1:2],
    summary(lm_robust(how_nonrel ~ treat_pid, data = dat[dat$treat3 %in% c("partisan_outpid", "public") & dat$pid3 == "D",]))$coefficients[2, 1:2],
    summary(lm_robust(how_nonrel ~ relevel(treat_pid, ref = "R"), data = dat[dat$treat3 %in% c("partisan_outpid") & dat$pid3 %in% c("D", "R"),]))$coefficients[2, 1:2],
    summary(lm_robust(how_under200 ~ treat_pid, data = dat[dat$treat3 %in% c("partisan_outpid", "public") & dat$pid3 == "R",]))$coefficients[2, 1:2],
    summary(lm_robust(how_under200 ~ treat_pid, data = dat[dat$treat3 %in% c("partisan_outpid", "public") & dat$pid3 == "D",]))$coefficients[2, 1:2],
    summary(lm_robust(how_under200 ~ relevel(treat_pid, ref = "R"), data = dat[dat$treat3 %in% c("partisan_outpid") & dat$pid3 %in% c("D", "R"),]))$coefficients[2, 1:2],
    summary(lm_robust(how_union ~ treat_pid, data = dat[dat$treat3 %in% c("partisan_outpid", "public") & dat$pid3 == "R",]))$coefficients[2, 1:2],
    summary(lm_robust(how_union ~ treat_pid, data = dat[dat$treat3 %in% c("partisan_outpid", "public") & dat$pid3 == "D",]))$coefficients[2, 1:2],
    summary(lm_robust(how_union ~ relevel(treat_pid, ref = "R"), data = dat[dat$treat3 %in% c("partisan_outpid") & dat$pid3 %in% c("D", "R"),]))$coefficients[2, 1:2],
    cbind(cces_rates$Democrats - cces_rates$USPublic, NA),
    cbind(cces_rates$Republicans - cces_rates$USPublic, NA),
    cbind(cces_rates$Democrats - cces_rates$Republicans, NA)
  )),
  Comparison = c(rep(c("D - US (R respondents)", "R - US (D respondents)", "D - R (out party respondents)"), 9), rep("D - US (R respondents)", 9), rep("R - US (D respondents)", 9), rep("D - R (out party respondents)", 9)),
  Area = factor(c(rep(as.character(cces_rates$Label), each = 3), rep(as.character(cces_rates$Label), 3)), 
                levels = cces_rates$Label[order(cces_rates$Democrats - cces_rates$Republicans)]),
  Source = c(rep("Average Estimate", 27), rep("Approximate Truth", 27))
)

gaps_plotdat_outpid$ComparisonUS <- "Party vs Party"
gaps_plotdat_outpid$ComparisonUS[grepl("US", gaps_plotdat_outpid$Comparison)] <- "Party vs US Public"


ggplot(gaps_plotdat_outpid, aes(y = Estimate, x = Area, color = Comparison, shape = Source)) + 
  geom_point(position = position_dodge(0.3)) +
  geom_errorbar(aes(ymin = Estimate - 1.96*Std..Error, 
                    ymax = Estimate + 1.96*Std..Error),
                width = 0, position = position_dodge(0.3)) +
  scale_color_manual(values = c("black", "darkblue", "firebrick1")) +
  geom_hline(yintercept = 0, color = "darkgrey", linetype = "dashed") +
  facet_wrap(vars(ComparisonUS), scales = "free_x") + 
  labs(x = "", y = "Difference") +
  coord_flip() +
  theme_bw() +
  theme(legend.position = "bottom", legend.box = "vertical", legend.margin = margin(),
        strip.background = element_rect(fill = "white"))
ggsave(file = "FigureA3.pdf", height = 6, width = 8.5)


#----------------------------------------------
# Figure A4: Mean vs median perception differences
#----------------------------------------------

lucid_howrates_median <- group_by(dat, treat_pid) %>% 
  summarize(city = weightedMedian(how_city, w = treat_ipw, na.rm = T), 
            female = weightedMedian(how_female, w = treat_ipw, na.rm = T),
            under40 = weightedMedian(how_young, w = treat_ipw, na.rm = T),
            lbg = weightedMedian(how_lgb, w = treat_ipw, na.rm = T),
            college = weightedMedian(how_college, w = treat_ipw, na.rm = T),
            nonwhite = weightedMedian(how_nonwhite, w = treat_ipw, na.rm = T),
            nonreligious = weightedMedian(how_nonrel, w = treat_ipw, na.rm = T), 
            under_income = weightedMedian(how_under200, w = treat_ipw, na.rm = T), 
            union = weightedMedian(how_union, w = treat_ipw, na.rm = T)) %>%
  t() %>% 
  apply(2, function(x) round(as.numeric(x), 2))
lucid_howrates_median <- data.frame(lucid_howrates_median[-1, c(2, 3, 1)])
colnames(lucid_howrates_median) <- c("Democrats", "Republicans", "USPublic")
row.names(lucid_howrates_median) <- row.names(cces_rates)
lucid_howrates_median$Label <- cces_rates$Label

gaps_plotdat_median <- rbind(gaps_plotdat,
                               data.frame(
                                 Estimate = c(c(lucid_howrates_median$Democrats - lucid_howrates_median$USPublic), 
                                              c(lucid_howrates_median$Republicans - lucid_howrates_median$USPublic),
                                              c(lucid_howrates_median$Democrats - lucid_howrates_median$Republicans)),
                                 Std..Error = NA,
                                 Comparison = c(rep("D - US", 9), rep("R - US", 9), rep("D - R", 9)),
                                 Area = factor(rep(as.character(cces_rates$Label), 3), 
                                               levels = cces_rates$Label[order(cces_rates$Democrats - cces_rates$Republicans)]),
                                 Source = c(rep("Median Estimate", 27)),
                                 ComparisonUS = c(rep("Party vs US Public", 18), rep("Party vs Party", 9))
                               )
                             )

ggplot(gaps_plotdat_median, aes(y = Estimate, x = Area, color = Comparison, shape = Source)) + 
  geom_point(position = position_dodge(0.3)) +
  geom_errorbar(aes(ymin = Estimate - 1.96*Std..Error, 
                    ymax = Estimate + 1.96*Std..Error), 
                width = 0, position = position_dodge(0.3)) +
  scale_shape_manual(values = c(16, 17, 4)) +
  scale_color_manual(values = c("black", "darkblue", "firebrick1")) +
  geom_hline(yintercept = 0, color = "darkgrey", linetype = "dashed") +
  facet_wrap(vars(ComparisonUS), scales = "free_x") + 
  labs(x = "", y = "Difference") +
  coord_flip() +
  theme_bw() +
  theme(legend.position = "bottom", legend.box = "vertical", legend.margin = margin(),
        strip.background = element_rect(fill = "white"))
ggsave(file = "FigureA4.pdf", height = 6, width = 8.5)


#----------------------------------------------
# Figure A5: Effect of vignette pid weighted to target U.S. population
#----------------------------------------------

gaps_plotdat_weighted <- cbind(
  data.frame(rbind(
    summary(lm_robust(how_city ~ treat_pid, data = dat, weights = treat_ipw_norm*weight_uspop))$coefficients[2:3, 1:2],
    summary(lm_robust(how_city ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw_norm*weight_uspop))$coefficients[3, 1:2],
    summary(lm_robust(how_female ~ treat_pid, data = dat, weights = treat_ipw_norm*weight_uspop))$coefficients[2:3, 1:2],
    summary(lm_robust(how_female ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw_norm*weight_uspop))$coefficients[3, 1:2],
    summary(lm_robust(how_young ~ treat_pid, data = dat, weights = treat_ipw_norm*weight_uspop))$coefficients[2:3, 1:2],
    summary(lm_robust(how_young ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw_norm*weight_uspop))$coefficients[3, 1:2],
    summary(lm_robust(how_lgb ~ treat_pid, data = dat, weights = treat_ipw_norm*weight_uspop))$coefficients[2:3, 1:2],
    summary(lm_robust(how_lgb ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw_norm*weight_uspop))$coefficients[3, 1:2],
    summary(lm_robust(how_college ~ treat_pid, data = dat, weights = treat_ipw_norm*weight_uspop))$coefficients[2:3, 1:2],
    summary(lm_robust(how_college ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw_norm*weight_uspop))$coefficients[3, 1:2],
    summary(lm_robust(how_nonwhite ~ treat_pid, data = dat, weights = treat_ipw_norm*weight_uspop))$coefficients[2:3, 1:2],
    summary(lm_robust(how_nonwhite ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw_norm*weight_uspop))$coefficients[3, 1:2],
    summary(lm_robust(how_nonrel ~ treat_pid, data = dat, weights = treat_ipw_norm*weight_uspop))$coefficients[2:3, 1:2],
    summary(lm_robust(how_nonrel ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw_norm*weight_uspop))$coefficients[3, 1:2],
    summary(lm_robust(how_under200 ~ treat_pid, data = dat, weights = treat_ipw_norm*weight_uspop))$coefficients[2:3, 1:2],
    summary(lm_robust(how_under200 ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw_norm*weight_uspop))$coefficients[3, 1:2],
    summary(lm_robust(how_union ~ treat_pid, data = dat, weights = treat_ipw_norm*weight_uspop))$coefficients[2:3, 1:2],
    summary(lm_robust(how_union ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw_norm*weight_uspop))$coefficients[3, 1:2],
    cbind(cces_rates$Democrats - cces_rates$USPublic, NA),
    cbind(cces_rates$Republicans - cces_rates$USPublic, NA),
    cbind(cces_rates$Democrats - cces_rates$Republicans, NA)
  )),
  Comparison = c(rep(c("D - US", "R - US", "D - R"), 9), rep("D - US", 9), rep("R - US", 9), rep("D - R", 9)),
  Area = factor(c(rep(as.character(cces_rates$Label), each = 3), rep(as.character(cces_rates$Label), 3)), 
                levels = cces_rates$Label[order(cces_rates$Democrats - cces_rates$Republicans)]),
  Source = c(rep("Average Estimate", 27), rep("Approximate Truth", 27))
)

gaps_plotdat_weighted$ComparisonUS <- "Party vs Party"
gaps_plotdat_weighted$ComparisonUS[grepl("US", gaps_plotdat_weighted$Comparison)] <- "Party vs US Public"


ggplot(gaps_plotdat_weighted, aes(y = Estimate, x = Area, color = Comparison, shape = Source)) + 
  geom_point(position = position_dodge(0.3)) +
  geom_errorbar(aes(ymin = Estimate - 1.96*Std..Error, 
                    ymax = Estimate + 1.96*Std..Error), 
                width = 0, position = position_dodge(0.3)) +
  scale_color_manual(values = c("black", "darkblue", "firebrick1")) +
  geom_hline(yintercept = 0, color = "darkgrey", linetype = "dashed") +
  facet_wrap(vars(ComparisonUS), scales = "free_x") + 
  labs(x = "", y = "Difference") +
  coord_flip() +
  theme_bw() +
  theme(legend.position = "bottom", legend.box = "vertical", legend.margin = margin(),
        strip.background = element_rect(fill = "white"))
ggsave(file = "FigureA5.pdf", height = 6, width = 8.5)


#----------------------------------------------
# Figure A7: Effect of vignette pid relative to ANES and GSS baseline
#----------------------------------------------

gaps_plotdat_baseline <- cbind(
  data.frame(rbind(
    summary(lm_robust(how_city ~ treat_pid, data = dat, weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_city ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw))$coefficients[3, 1:2],
    summary(lm_robust(how_female ~ treat_pid, data = dat, weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_female ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw))$coefficients[3, 1:2],
    summary(lm_robust(how_young ~ treat_pid, data = dat, weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_young ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw))$coefficients[3, 1:2],
    summary(lm_robust(how_lgb ~ treat_pid, data = dat, weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_lgb ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw))$coefficients[3, 1:2],
    summary(lm_robust(how_college ~ treat_pid, data = dat, weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_college ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw))$coefficients[3, 1:2],
    summary(lm_robust(how_nonwhite ~ treat_pid, data = dat, weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_nonwhite ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw))$coefficients[3, 1:2],
    summary(lm_robust(how_nonrel ~ treat_pid, data = dat, weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_nonrel ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw))$coefficients[3, 1:2],
    summary(lm_robust(how_under200 ~ treat_pid, data = dat, weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_under200 ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw))$coefficients[3, 1:2],
    summary(lm_robust(how_union ~ treat_pid, data = dat, weights = treat_ipw))$coefficients[2:3, 1:2],
    summary(lm_robust(how_union ~ relevel(treat_pid, ref = "R"), data = dat, weights = treat_ipw))$coefficients[3, 1:2],
    cbind(cces_rates$Democrats - cces_rates$USPublic, NA),
    cbind(cces_rates$Republicans - cces_rates$USPublic, NA),
    cbind(cces_rates$Democrats - cces_rates$Republicans, NA),
    cbind(anes_rates$Democrats - anes_rates$USPublic, NA),
    cbind(anes_rates$Republicans - anes_rates$USPublic, NA),
    cbind(anes_rates$Democrats - anes_rates$Republicans, NA),
    cbind(gss_rates$Democrats - gss_rates$USPublic, NA),
    cbind(gss_rates$Republicans - gss_rates$USPublic, NA),
    cbind(gss_rates$Democrats - gss_rates$Republicans, NA)
  )),
  Comparison = c(rep(c("D - US", "R - US", "D - R"), 9), rep(c(rep("D - US", 9), rep("R - US", 9), rep("D - R", 9)), 3)),
  Area = factor(c(rep(as.character(cces_rates$Label), each = 3), rep(as.character(cces_rates$Label), 9)), 
                levels = cces_rates$Label[order(cces_rates$Democrats - cces_rates$Republicans)]),
  Source = c(rep("Average Estimate", 27), rep("Approximate Truth (CCES)", 27), 
             rep("Approximate Truth (ANES)", 27), rep("Approximate Truth (GSS)", 27))
)

gaps_plotdat_baseline$ComparisonUS <- "Party vs Party"
gaps_plotdat_baseline$ComparisonUS[grepl("US", gaps_plotdat_baseline$Comparison)] <- "Party vs US Public"


ggplot(gaps_plotdat_baseline, aes(y = Estimate, x = Area, color = Comparison, shape = Source)) + 
  geom_point(position = position_dodge(0.3)) +
  geom_errorbar(aes(ymin = Estimate - 1.96*Std..Error, 
                    ymax = Estimate + 1.96*Std..Error),
                width = 0, position = position_dodge(0.3)) +
  scale_color_manual(values = c("black", "darkblue", "firebrick1")) +
  geom_hline(yintercept = 0, color = "darkgrey", linetype = "dashed") +
  facet_wrap(vars(ComparisonUS), scales = "free_x") + 
  labs(x = "", y = "Difference") +
  coord_flip() +
  theme_bw() +
  theme(legend.position = "bottom", legend.box = "vertical", legend.margin = margin(),
        strip.background = element_rect(fill = "white"))
ggsave(file = "FigureA7.pdf", height = 6, width = 8.5)


#----------------------------------------------
# Figure A8: Ratio based estimator of vigette pid effect
#----------------------------------------------

set.seed(2021)

reps <- 1000
DoverR_boot <- data.frame(matrix(nrow = reps, ncol = nrow(lucid_howrates)))

for(i in 1:reps){
  dat_temp <- dat[sample(1:nrow(dat), replace = TRUE),]
  howrates_temp <- group_by(dat_temp, treat_pid) %>% 
  summarize(city = weighted.mean(how_city, w = treat_ipw, na.rm = T),
            female = weighted.mean(how_female, w = treat_ipw, na.rm = T),
            under40 = weighted.mean(how_young, w = treat_ipw, na.rm = T), 
            lbg = weighted.mean(how_lgb, w = treat_ipw, na.rm = T),
            college = weighted.mean(how_college, w = treat_ipw, na.rm = T),
            nonwhite = weighted.mean(how_nonwhite, w = treat_ipw, na.rm = T), 
            nonreligious = weighted.mean(how_nonrel, w = treat_ipw, na.rm = T),
            high_income = weighted.mean(how_under200, w = treat_ipw, na.rm = T),
            union = weighted.mean(how_union, w = treat_ipw, na.rm = T))
  DoverR_boot[i,] <- unname(howrates_temp[2, -1]/howrates_temp[3, -1])
}
DoverR_CI <- apply(DoverR_boot, 2, quantile, c(0.025, 0.975))

ratio_plotdat <- data.frame(DoverR = c(lucid_howrates$Democrats / lucid_howrates$Republicans, cces_rates$Democrats / cces_rates$Republicans,
                      anes_rates$Democrats / anes_rates$Republicans, gss_rates$Democrats / gss_rates$Republicans),
           DoverR_low = c(DoverR_CI[1,], rep(NA, 9*3)),
           DoverR_high = c(DoverR_CI[2,], rep(NA, 9*3)),
           Source = rep(c("Average Estimate", "Approximate Truth (CCES)", "Approximate Truth (ANES)", "Approximate Truth (GSS)"), each = nrow(cces_rates)),
           Label = factor(rep(lucid_howrates$Label, 2),
                          levels = cces_rates$Label[order(cces_rates$Democrats - cces_rates$Republicans)]))

ggplot(ratio_plotdat, aes(y = DoverR, x = Label, shape = Source)) + 
  geom_point(position = position_dodge(0.3)) +
  geom_errorbar(aes(ymin = DoverR_low, ymax = DoverR_high), width = 0, position = position_dodge(0.3)) +
  geom_hline(yintercept = 1, color = "darkgrey", linetype = "dashed") +
  labs(x = "", y = "Ratio D/R") +
  coord_flip() +
  guides(shape = guide_legend(nrow = 2)) +
  theme_bw() +
  theme(legend.position = "bottom", legend.box = "vertical", legend.margin = margin(),
        strip.background = element_rect(fill = "white"))
ggsave(file = "FigureA8.pdf", width = 6, height = 6)


#----------------------------------------------
# Tables A7 - A8: Difference in differences vs ratio
#----------------------------------------------

diff_in_diff <- gaps_plotdat$Estimate[gaps_plotdat$Comparison == "D - R" & gaps_plotdat$Source == "Approximate Truth"] -
  gaps_plotdat$Estimate[gaps_plotdat$Comparison == "D - R" & gaps_plotdat$Source == "Average Estimate"]

diff_in_ratio <- ratio_plotdat$DoverR[ratio_plotdat$Source == "Approximate Truth (CCES)"] -
  ratio_plotdat$DoverR[ratio_plotdat$Source == "Average Estimate"]

print(xtable(caption = "Difference in differences summary", label = "tab:diffindiff",
tibble(Area = gaps_plotdat$Area[gaps_plotdat$Comparison == "D - R" & gaps_plotdat$Source == "Average Estimate"],
       'D - R CCES' = gaps_plotdat$Estimate[gaps_plotdat$Comparison == "D - R" & gaps_plotdat$Source == "Approximate Truth"],
       'D - R Estimate (SE)' = paste0(round(gaps_plotdat$Estimate[gaps_plotdat$Comparison == "D - R" & gaps_plotdat$Source == "Average Estimate"], 2), 
                                     " (", round(gaps_plotdat$Std..Error[gaps_plotdat$Comparison == "D - R" & gaps_plotdat$Source == "Average Estimate"], 2), ")"),
       'Diff in diff' = diff_in_diff)
), include.rownames = F)

print(xtable(caption = "Difference in ratios summary", label = "tab:diffinratio",
tibble(Area = ratio_plotdat$Label[ratio_plotdat$Source == "Average Estimate"],
       'D / R CCES' = ratio_plotdat$DoverR[ratio_plotdat$Source == "Approximate Truth (CCES)"],
       'D / R Estimate (SE)' = paste0(round(ratio_plotdat$DoverR[ratio_plotdat$Source == "Average Estimate"], 2), " (", 
                                   round(apply(DoverR_boot, 2, sd), 2), ")"),
       'Diff in ratio' = diff_in_ratio)
), include.rownames = F)
